##################################################
## Human Trafficking Indicators: A New Dataset ##
## Author: Richard W Frank                     ##
## Journal: International Interactions         ##
## Code written: April 2021                    ##
## Software: rstan ver. 2.15.1, R ver. 3.3.1   ##
## Purpose: Prepare data for STAN model fit    ##
## currently updated through 2017 HTI data     ##
#################################################
 
################################################################# 
## setting working directory. All files necessary to run model ##
## should be in this folder.                                   ## 

setwd("~")

rm(list=ls())

library(coda); library(foreign); library(rstan); library(rjags); library(dplyr); library(dbplyr)

## Update new data

id.func <- function(id, n){
  tmp <- rep(1, n)
  if(id!=1){
    for(ii in 2:id){
      tmp <- append(tmp, rep(ii, n))
    }
  }
  tmp
}

# set time start variable
systime1 <- Sys.time()
print(Sys.time() - systime1)

STARTYEAR <- 2001
 
data <-read.csv("HT_irt_merged1.csv")
nrow(data)

#data <- na.omit(data) 
data <- subset(data, !is.na(dest) | !is.na(pdest) | !is.na(ldest) | !is.na(ddest) | 
                 !is.na(dsdest) | !is.na(cpdest) | !is.na(cldest) | !is.na(u_vict_5) | !is.na(c_labor_5) | 
                 !is.na(c_sex_5) | !is.na(c_vic_5))
nrow(data)
n <- nrow(data)
data$ccode_year <- paste(data$ccode, data$year, sep="-")
id <- factor(data$ccode_year)
id <- as.numeric(id)
data$id <- id 
prev_id <- numeric(length(id))
for(ii in 1:length(id)){
  tmp.year <- data$year[which(ii == id) ]
  tmp.ccode <- data$ccode[which(ii == id) ]
  
  if(any(((tmp.year-1) == data$year) & (tmp.ccode == data$ccode))){
    prev_id[ii] <- id[((tmp.year-1) == data$year) & (tmp.ccode == data$ccode)]
    
  } else {
    prev_id[ii] <- 0
  }
  
}

year <- data$year - min(data$year) + 1

# data$country <- as.numeric(as.factor(data$COW))
country <- as.numeric(as.factor(data$ccode))

############################################# 
## Split between standards and event based ##
############################################# 


### Standards:  dest pdest ldest ddest dsdest cpdest cldest
standards <- c("dest","pdest", "ldest", "ddest", "dsdest",
               "cpdest", "cldest")

## standards has to be separated because of the varying cutpoints
y_dest <- data$dest
latent_dest_id <-id[!is.na(y_dest)]
year_dest_id <- year[!is.na(y_dest)]
y_dest <- y_dest[!is.na(y_dest)]
N_dest <- length(y_dest)
table(y_dest)
y_dest <- y_dest + 1

y_pdest <- data$pdest
latent_pdest_id <-id[!is.na(y_pdest)]
year_pdest_id <- year[!is.na(y_pdest)]
y_pdest <- y_pdest[!is.na(y_pdest)]
N_pdest <- length(y_pdest)
table(y_pdest)
y_pdest <- y_pdest + 1

y_ldest <- data$ldest
latent_ldest_id <-id[!is.na(y_ldest)]
year_ldest_id <- year[!is.na(y_ldest)]
y_ldest <- y_ldest[!is.na(y_ldest)]
N_ldest <- length(y_ldest)
table(y_ldest)
y_ldest <- y_ldest + 1

y_ddest <- data$ddest
latent_ddest_id <-id[!is.na(y_ddest)]
year_ddest_id <- year[!is.na(y_ddest)]
y_ddest <- y_ddest[!is.na(y_ddest)]
N_ddest <- length(y_ddest)
table(y_ddest)

y_dsdest <- data$dsdest
latent_dsdest_id <-id[!is.na(y_dsdest)]
year_dsdest_id <- year[!is.na(y_dsdest)]
y_dsdest <- y_dsdest[!is.na(y_dsdest)]
N_dsdest <- length(y_dsdest)
table(y_dsdest)

y_cpdest <- data$cpdest
latent_cpdest_id <-id[!is.na(y_cpdest)]
year_cpdest_id <- year[!is.na(y_cpdest)]
y_cpdest <- y_cpdest[!is.na(y_cpdest)]
N_cpdest <- length(y_cpdest)
table(y_cpdest)

y_cldest <- data$cldest
latent_cldest_id <-id[!is.na(y_cldest)]
year_cldest_id <- year[!is.na(y_cldest)]
y_cldest <- y_cldest[!is.na(y_cldest)]
N_cldest <- length(y_cldest)
table(y_cldest)


### Events:  u_vict_5 c_labor_5 c_sex_5 c_vic_5  

events <- c("u_vict_5", "c_labor_5", "c_sex_5", "c_vic_5")
y.events <- data[,colnames(data) %in% events]
n.bins.events <- apply(y.events, 2, function(x) max(x, na.rm=T) - min(x, na.rm=T) + 1)

y_events_5 <- y.events[,n.bins.events == 5]
J_events_5 <- sum(n.bins.events==5)
y_events_5 <- as.vector(y_events_5)
item_events_id_5 <- id.func(J_events_5, nrow(data))
item_events_id_5 <- item_events_id_5[!is.na(y_events_5)]
latent_events_id_5 <- rep(id, J_events_5)[!is.na(y_events_5)]
# year_events_id_5 <- rep(year, J_events_5)[!is.na(y_events_5)]
y_events_5 <- y_events_5[!is.na(y_events_5)]
N_events_5 <- length(y_events_5)
table(y_events_5, item_events_id_5)
 
### Standards:  dest pdest ldest ddest dsdest cpdest cldest
### Events:  u_vict_5 c_labor_5 c_sex_5 c_vic_5  


stan.data <- list(N=nrow(data), prev_id=prev_id,
                  N_dest=N_dest, 
                  y_dest=y_dest, 
                  latent_dest_id=latent_dest_id, 
                  year_dest_id=year_dest_id,
                  N_pdest=N_pdest, 
                  y_pdest=y_pdest, 
                  latent_pdest_id=latent_pdest_id, 
                  year_pdest_id=year_pdest_id,
                  N_ldest=N_ldest, 
                  y_ldest=y_ldest,
                  latent_ldest_id=latent_ldest_id, 
                  year_ldest_id=year_ldest_id,
                  N_ddest=N_ddest, 
                  y_ddest=y_ddest, 
                  latent_ddest_id=latent_ddest_id, 
                  year_ddest_id=year_ddest_id,
                  N_dsdest=N_dsdest, 
                  y_dsdest=y_dsdest, 
                  latent_dsdest_id=latent_dsdest_id, 
                  year_dsdest_id=year_dsdest_id,
                  N_cpdest=N_cpdest, 
                  y_cpdest=y_cpdest, 
                  latent_cpdest_id=latent_cpdest_id, 
                  year_cpdest_id=year_cpdest_id,
                  N_cldest=N_cldest, 
                  y_cldest=y_cldest, 
                  latent_cldest_id=latent_cldest_id, 
                  year_cldest_id=year_cldest_id,
                  J_events_5=J_events_5, 
                  N_events_5=N_events_5, 
                  y_events_5=y_events_5, 
                  item_events_id_5=item_events_id_5, 
                  latent_events_id_5=latent_events_id_5)


setwd("~")
save.image("HTI_stan_data_prepped.RData")
